import pandas as pd
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import os

# 手肘法，核心指标误差平方和
SSE = []

# 读入数据
data = []
path = './resources/data'
files = os.listdir(path)
# 需要排序，读取文件名是乱序
files.sort(key=lambda x:x[:-5])
print(files)

for i in range(len(files)):
    with open(path+"/"+files[i], encoding="utf-8") as fp:
        listLine = []
        for line in fp.readlines()[1:]:
            lineValue = line.strip().split(',')
            for j in range(len(lineValue)):
                listLine.append(int(lineValue[j]))
    data.append(listLine)
    fp.close()

for k in range(1, 9):
    estimator = KMeans(n_clusters=k)
    estimator.fit(data)
    # inertia_是每个点到聚类中心的距离和
    SSE.append(estimator.inertia_)

X = range(1, 9)
plt.xlabel('k')
plt.ylabel('SSE')
plt.plot(X, SSE, 'o-')
plt.show()
